{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "b4ddc184-c386-46f4-ab8a-da0443a985b8",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "051fdb30-7b7e-42b1-a5c9-6bc1ff20d355",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.neighbors import KNeighborsRegressor"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "14c9d547-96b1-462f-8a90-3af6eb042c0b",
   "metadata": {},
   "source": [
    "# 基于KNN的房价估值"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "40cf05e5-8fec-403e-8dde-dd9362a42820",
   "metadata": {},
   "source": [
    "## 1、导入数据集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "b181dcec-9aba-4be9-a064-1690bcbb493f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>X1 transaction date</th>\n",
       "      <th>X2 house age</th>\n",
       "      <th>X3 distance to the nearest MRT station</th>\n",
       "      <th>X4 number of convenience stores</th>\n",
       "      <th>X5 latitude</th>\n",
       "      <th>X6 longitude</th>\n",
       "      <th>Y house price of unit area</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>No</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2012.916667</td>\n",
       "      <td>32.0</td>\n",
       "      <td>84.87882</td>\n",
       "      <td>10</td>\n",
       "      <td>24.98298</td>\n",
       "      <td>121.54024</td>\n",
       "      <td>37.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2012.916667</td>\n",
       "      <td>19.5</td>\n",
       "      <td>306.59470</td>\n",
       "      <td>9</td>\n",
       "      <td>24.98034</td>\n",
       "      <td>121.53951</td>\n",
       "      <td>42.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2013.583333</td>\n",
       "      <td>13.3</td>\n",
       "      <td>561.98450</td>\n",
       "      <td>5</td>\n",
       "      <td>24.98746</td>\n",
       "      <td>121.54391</td>\n",
       "      <td>47.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2013.500000</td>\n",
       "      <td>13.3</td>\n",
       "      <td>561.98450</td>\n",
       "      <td>5</td>\n",
       "      <td>24.98746</td>\n",
       "      <td>121.54391</td>\n",
       "      <td>54.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2012.833333</td>\n",
       "      <td>5.0</td>\n",
       "      <td>390.56840</td>\n",
       "      <td>5</td>\n",
       "      <td>24.97937</td>\n",
       "      <td>121.54245</td>\n",
       "      <td>43.1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    X1 transaction date  X2 house age  X3 distance to the nearest MRT station  \\\n",
       "No                                                                              \n",
       "1           2012.916667          32.0                                84.87882   \n",
       "2           2012.916667          19.5                               306.59470   \n",
       "3           2013.583333          13.3                               561.98450   \n",
       "4           2013.500000          13.3                               561.98450   \n",
       "5           2012.833333           5.0                               390.56840   \n",
       "\n",
       "    X4 number of convenience stores  X5 latitude  X6 longitude  \\\n",
       "No                                                               \n",
       "1                                10     24.98298     121.54024   \n",
       "2                                 9     24.98034     121.53951   \n",
       "3                                 5     24.98746     121.54391   \n",
       "4                                 5     24.98746     121.54391   \n",
       "5                                 5     24.97937     121.54245   \n",
       "\n",
       "    Y house price of unit area  \n",
       "No                              \n",
       "1                         37.9  \n",
       "2                         42.2  \n",
       "3                         47.3  \n",
       "4                         54.8  \n",
       "5                         43.1  "
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "real_estate = pd.read_excel('../dataset/Real estate valuation data set.xlsx', index_col='No')\n",
    "real_estate.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "fb2e73ad-25ca-434d-a1a0-f4525e04d9c2",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = real_estate.iloc[:, :-1].copy()\n",
    "target = real_estate.iloc[:, -1].copy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "ff81ce3a-4213-4692-b45b-8ac20948060c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>X1 transaction date</th>\n",
       "      <th>X2 house age</th>\n",
       "      <th>X3 distance to the nearest MRT station</th>\n",
       "      <th>X4 number of convenience stores</th>\n",
       "      <th>X5 latitude</th>\n",
       "      <th>X6 longitude</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>No</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2012.916667</td>\n",
       "      <td>32.0</td>\n",
       "      <td>84.87882</td>\n",
       "      <td>10</td>\n",
       "      <td>24.98298</td>\n",
       "      <td>121.54024</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2012.916667</td>\n",
       "      <td>19.5</td>\n",
       "      <td>306.59470</td>\n",
       "      <td>9</td>\n",
       "      <td>24.98034</td>\n",
       "      <td>121.53951</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2013.583333</td>\n",
       "      <td>13.3</td>\n",
       "      <td>561.98450</td>\n",
       "      <td>5</td>\n",
       "      <td>24.98746</td>\n",
       "      <td>121.54391</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2013.500000</td>\n",
       "      <td>13.3</td>\n",
       "      <td>561.98450</td>\n",
       "      <td>5</td>\n",
       "      <td>24.98746</td>\n",
       "      <td>121.54391</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2012.833333</td>\n",
       "      <td>5.0</td>\n",
       "      <td>390.56840</td>\n",
       "      <td>5</td>\n",
       "      <td>24.97937</td>\n",
       "      <td>121.54245</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    X1 transaction date  X2 house age  X3 distance to the nearest MRT station  \\\n",
       "No                                                                              \n",
       "1           2012.916667          32.0                                84.87882   \n",
       "2           2012.916667          19.5                               306.59470   \n",
       "3           2013.583333          13.3                               561.98450   \n",
       "4           2013.500000          13.3                               561.98450   \n",
       "5           2012.833333           5.0                               390.56840   \n",
       "\n",
       "    X4 number of convenience stores  X5 latitude  X6 longitude  \n",
       "No                                                              \n",
       "1                                10     24.98298     121.54024  \n",
       "2                                 9     24.98034     121.53951  \n",
       "3                                 5     24.98746     121.54391  \n",
       "4                                 5     24.98746     121.54391  \n",
       "5                                 5     24.97937     121.54245  "
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "bad73f20-7127-44e0-98cd-e181c9c53cc5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "No\n",
       "1      37.9\n",
       "2      42.2\n",
       "3      47.3\n",
       "4      54.8\n",
       "5      43.1\n",
       "       ... \n",
       "410    15.4\n",
       "411    50.0\n",
       "412    40.6\n",
       "413    52.5\n",
       "414    63.9\n",
       "Name: Y house price of unit area, Length: 414, dtype: float64"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "target"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d3e57aac-7c04-4137-a0c8-b07a1f4a2312",
   "metadata": {},
   "source": [
    "## 2、划分数据集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "f2a2e825-de01-47ce-9b2c-8b6a4528776f",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=44, random_state=10)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "03fd602f-442d-4cdf-9554-4f4e9dece2fe",
   "metadata": {},
   "source": [
    "## 3、模型训练"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "a43ebb24-5329-4284-97ca-2b50b5fe976a",
   "metadata": {},
   "outputs": [],
   "source": [
    "knn = KNeighborsRegressor()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "3bf0a6ea-4c93-40fd-a329-850076d21d27",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-5 {color: black;}#sk-container-id-5 pre{padding: 0;}#sk-container-id-5 div.sk-toggleable {background-color: white;}#sk-container-id-5 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-5 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-5 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-5 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-5 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-5 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-5 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-5 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-5 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-5 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-5 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-5 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-5 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-5 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-5 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-5 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-5 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-5 div.sk-item {position: relative;z-index: 1;}#sk-container-id-5 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-5 div.sk-item::before, #sk-container-id-5 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-5 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-5 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-5 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-5 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-5 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-5 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-5 div.sk-label-container {text-align: center;}#sk-container-id-5 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-5 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-5\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>KNeighborsRegressor()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-5\" type=\"checkbox\" checked><label for=\"sk-estimator-id-5\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">KNeighborsRegressor</label><div class=\"sk-toggleable__content\"><pre>KNeighborsRegressor()</pre></div></div></div></div></div>"
      ],
      "text/plain": [
       "KNeighborsRegressor()"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "knn.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fa860206-bb9a-4a67-a3ba-2df454cfa8d9",
   "metadata": {},
   "source": [
    "## 4、模型评估"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "e998c835-837b-45db-9e0c-329d8c9e018f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.8591620142312315"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "knn.score(X_test, y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2cc4572d-cad5-439d-918f-64135c26adc5",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
